# encoding: utf-8
"""
@author: 夏洛
@QQ: 1972386194
@file: 02-提取淘宝.py
"""

# 使用正则提取淘宝数据

import re
html =  open('taobao.html','r',encoding='utf-8').read()

# 从网页里面提取数据
regex = re.compile('<script>\s*g_page_config = (.*?);\s*g_srp_loadCss')

# 查找
lst_res = regex.findall(html)
# print(lst_res)


from pyquery import PyQuery as pq
import requests
res = requests.get('https://pic.netbian.com/4kmeinv/index.html')
res.encoding = 'gbk'
qq = pq(res.text)
qqq = qq('div.slist')
pattern = '<img[^>]*>'
result1 = re.findall(pattern, str(qqq))
for i in result1:
    print(re.search('src="(.*?)"',i).group(1))

